PBJ 야구

데이터 전처리

df <- read.csv("kbopitchingdata.csv")
# View(df)
str(df)
'data.frame':   323 obs. of  34 variables:
 $ id                 : int  1 2 3 4 5 6 7 8 9 10 ...
 $ year               : int  2021 2021 2021 2021 2021 2021 2021 2021 2021 2021 ...
 $ team               : chr  "LG Twins" "KT Wiz" "Doosan Bears" "Samsung Lions" ...
 $ average_age        : num  26.3 28.4 27.5 28.8 27.7 25.8 27.3 27 25.3 27.1 ...
 $ runs_per_game      : num  3.9 4.06 4.57 4.57 4.8 4.89 5.13 5.13 5.22 5.64 ...
 $ wins               : int  72 75 70 75 67 69 66 49 58 64 ...
 $ losses             : int  57 59 65 59 67 67 63 82 75 71 ...
 $ win_loss_percentage: num  0.558 0.56 0.519 0.56 0.5 0.507 0.512 0.374 0.436 0.474 ...
 $ ERA                : num  3.57 3.67 4.28 4.29 4.5 4.33 4.8 4.67 4.89 5.39 ...
 $ run_average_9      : num  3.96 4.17 4.66 4.7 4.95 5.02 5.2 5.29 5.33 5.75 ...
 $ games              : int  143 143 143 143 143 143 143 143 143 143 ...
 $ games_started      : int  143 143 143 143 143 143 143 143 143 143 ...
 $ games_finished     : int  143 141 141 141 140 142 143 142 142 142 ...
 $ complete_game      : int  0 2 2 2 3 1 0 1 1 1 ...
 $ shutouts           : int  18 6 10 14 10 7 5 6 6 12 ...
 $ saves              : int  32 33 27 46 33 30 25 21 36 36 ...
 $ innings_pitched    : num  1264 1255 1260 1250 1247 ...
 $ hits               : int  1117 1166 1288 1287 1256 1276 1283 1200 1274 1330 ...
 $ runs               : int  557 581 653 653 686 699 733 734 746 806 ...
 $ earned_runs        : int  501 512 599 596 624 604 676 648 684 756 ...
 $ home_runs          : int  79 85 104 129 122 100 147 114 133 132 ...
 $ walks              : int  542 486 586 526 585 566 623 669 616 653 ...
 $ intentional_walks  : int  17 18 16 13 14 27 27 22 5 19 ...
 $ strikeouts         : int  1062 1051 1037 1031 1046 893 1006 1006 946 1047 ...
 $ hit_batter         : int  97 42 73 51 77 80 78 101 104 86 ...
 $ balks              : int  5 1 7 3 8 4 9 11 9 5 ...
 $ wild_pitches       : int  43 56 51 56 74 58 40 56 58 102 ...
 $ batters_faced      : int  5416 5359 5596 5496 5575 5568 5661 5633 5658 5722 ...
 $ WHIP               : num  1.31 1.32 1.49 1.45 1.48 ...
 $ hits_9             : num  8 8.4 9.2 9.3 9.1 9.2 9.1 8.6 9.1 9.5 ...
 $ homeruns_9         : num  0.6 0.6 0.7 0.9 0.9 0.7 1 0.8 1 0.9 ...
 $ walks_9            : num  3.9 3.5 4.2 3.8 4.2 4.1 4.4 4.8 4.4 4.7 ...
 $ strikeouts_9       : num  7.6 7.5 7.4 7.4 7.5 6.4 7.1 7.2 6.8 7.5 ...
 $ strikeout_walk     : num  1.96 2.16 1.77 1.96 1.79 1.58 1.61 1.5 1.54 1.6 ...
# 결측치가 있어서 필요 없는 컬럼 제거
df_01 <- subset(df, select=-c(games_started,games_finished,intentional_walks, balks, wild_pitches))

# 연도별 바뀐 팀들을 현대의 이름으로 재정렬
for (i in (1:length(df_01$team))){
  if(df_01$team[i] == 'MBC Blue Dragons'){
    df_01$team[i] = 'LG Twins'
  } else if(df_01$team[i] == 'OB Bears'){
    df_01$team[i] = 'Doosan Bears'
  } else if(df_01$team[i] == 'Nexen Heroes' | df_01$team[i] == 'Woori Heroes'){
    df_01$team[i] = 'Kiwoom Heroes'
  } else if(df_01$team[i] == 'SK Wyverns'){
    df_01$team[i] = 'SSG Landers'
  } else if(df_01$team[i] == 'Binggre Eagles'){
    df_01$team[i] = 'Hanwha Eagles'
  } else if(df_01$team[i] == 'Haitai Tigers'){
    df_01$team[i] = 'Kia Tigers'
  } else if(df_01$team[i] == 'Pacific Dolphins' | df_01$team[i] == 'Chungbo Pintos' | df_01$team[i] == 'Sammi Superstars'){
    df_01$team[i] = 'Hyundai Unicorns'
  }
}

KBO 리그 선수들의 나이 : Horizontal Violin Graph

https://r-graph-gallery.com/violin_horizontal_ggplot2.html

# Libraries
library(ggplot2)
library(dplyr)
library(tidyr)
library(forcats)
library(hrbrthemes)
library(viridis)
library(plotly)

# library(extrafont)
# font_import(paths=NULL, recursive = TRUE, prompt=TRUE, pattern=NULL) # "윈도우즈 폰트데이터베이스에서 찾을 수 없는 폰트페밀리입니다" 오류 해결, 30분 이상 소요

# Plot

p <- df_01 |>
  ggplot( aes(x=team, y=average_age, fill=team, color=team)) +
    geom_violin(width=1.6, size=0.5) +
    scale_fill_viridis(discrete=TRUE) +
    scale_color_viridis(discrete=TRUE) +
    theme_ipsum() +
    theme(
      legend.position="none"
    ) +
    coord_flip() + # This switch X and Y axis and allows to get the horizontal version
    xlab("") +
    ylab("age")

p

https://plotly.com/r/violin/

library(plotly)

fig <- df_01 %>%
  plot_ly(
    x = ~team,
    y = ~average_age,
    split = ~team,
    type = 'violin',
    box = list(
      visible = T
    ),
    meanline = list(
      visible = T
    )
  ) 

fig <- fig %>%
  layout(
    xaxis = list(
      title = "KBO TEAM"
    ),
    yaxis = list(
      title = "AGE",
      zeroline = F
    )
  )

fig

https://plotly.com/r/cumulative-animations/

library(plotly)

accumulate_by <- function(dat, var) {
  var <- lazyeval::f_eval(var, dat)
  lvls <- plotly:::getLevels(var)
  dats <- lapply(seq_along(lvls), function(x) {
    cbind(dat[var %in% lvls[seq(1, x)], ], frame = lvls[[x]])
  })
  dplyr::bind_rows(dats)
}

fig <- df_01 %>%
  filter(year > 1981, team %in% c("Doosan Bears", "LG Twins", "Samsung Lions", "Hanwha Eagles", "Kia Tigers", "Lotte Giants"))
fig <- fig %>% accumulate_by(~year)


fig <- fig %>%
  plot_ly(
    x = ~year, 
    y = ~average_age,
    split = ~team,
    frame = ~frame,
    type = 'scatter',
    mode = 'lines', 
    line = list(simplyfy = F)
  )
fig <- fig %>% layout(
  xaxis = list(
    title = "Date",
    zeroline = F
  ),
  yaxis = list(
    title = "average_age",
    zeroline = F
  )
) 
fig <- fig %>% animation_opts(
  frame = 100, 
  transition = 0, 
  redraw = FALSE
)
fig <- fig %>% animation_slider(
  currentvalue = list(
    prefix = "year"
  )
)

fig

KBO 리그 승률, 승 : Stacked Funnel Plot

https://plotly.com/r/funnel-charts/

# library(showtext) # 다운로드 없이 구글 제공 폰트 사용
# font_add_google("Gochi Hand", "gochi")

# Need to install plotly from Github to get funnel plots
# devtools::install_github("ropensci/plotly")

library(plotly)
df_2021 <- subset(df_01, year==2021) # 2021년 데이터만 추출
df_2021<-df_2021 |>
  arrange(desc(wins)) # wins 내림차순으로 정렬

fig <- plot_ly(
    type = "funnel",
    name = 'win_loss_percentage',
    y = df_2021$team,
    x = df_2021$win_loss_percentage*100,
    textposition = "inside",
    textinfo = "value+percent total")

fig <- fig %>%
  add_trace(
    type = "funnel",
    name = 'wins',
    y = df_2021$team,
    x = df_2021$wins,
    textposition = "inside",
    textinfo = "value+percent total")

fig <- fig %>%
  layout(yaxis = list(categoryarray = c(1:6)))

fig

KBO 선수들의 실책 : 3D Bubble Plot

https://plotly.com/r/3d-scatter-plots/

library(plotly)

df_2021 <- df_2021[order(df_2021$team), ]

fig <- plot_ly(df_2021, x = ~runs, y = ~hits, z = ~walks, color = ~team, size = ~wins,
             marker = list(symbol = 'circle', sizemode = 'diameter'), sizes = c(25, 50),
             text = ~paste('runs : ', runs, '<br>hits : ', hits, '<br> walks :', walks, '<br> wins : ', wins))
fig <- fig %>% layout(title = '선수들의 실책',
                      
         scene = list(xaxis = list(title = 'runs',
                      gridcolor = 'rgb(255, 255, 255)',
                      type = 'log',
                      zerolinewidth = 1,
                      ticklen = 5,
                      gridwidth = 2),
                      
               yaxis = list(title = 'hits',
                      gridcolor = 'rgb(255, 255, 255)',
                      zerolinewidth = 1,
                      ticklen = 5,
                      gridwith = 2),
               
               zaxis = list(title = 'walks',
                            gridcolor = 'rgb(255, 255, 255)',
                            type = 'log',
                            zerolinewidth = 1,
                            ticklen = 5,
                            gridwith = 2)),
         
         paper_bgcolor = 'rgb(243, 243, 243)',
         plot_bgcolor = 'rgb(243, 243, 243)')

fig

rayshader

https://www.rayshader.com/index.html

# To install the latest version from Github:
# install.packages("devtools")
# devtools::install_github("tylermorganwall/rayshader")
library(devtools)

# install.packages("rayrender")
library(rayrender)

## 
## Attaching package: 'rayrender'

## The following object is masked from 'package:rgl':
## 
##     text3d
# install.packages("rgl")
library(rgl)

# install.packages("rayshader")
library(rayshader)

# install.packages("rgdal")
library(rgdal)

library(raster)

library(viridis)
ggdiamonds = ggplot(diamonds) +
  stat_density_2d(aes(x = x, y = depth, fill = stat(nlevel)), 
                  geom = "polygon", n = 200, bins = 50,contour = TRUE) +
  facet_wrap(clarity~.) +
  scale_fill_viridis_c(option = "A")

par(mfrow = c(1, 2))

plot_gg(ggdiamonds, width = 5, height = 5, raytrace = FALSE, preview = TRUE)
plot_gg(ggdiamonds, width = 5, height = 5, multicore = TRUE, scale = 250, 
        zoom = 0.7, theta = 10, phi = 30, windowsize = c(800, 800))
Sys.sleep(0.2)
render_snapshot(clear = TRUE)